package cnki.kg.demo;

import cnki.kg.demo.util.StringUtil;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import lombok.Data;
import org.junit.jupiter.api.Test;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.test.context.web.WebAppConfiguration;

import java.io.IOException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;


@SpringBootTest
@WebAppConfiguration
public class ZTFL {
    @Autowired
    @Qualifier("mysqlJdbcTemplate")
    JdbcTemplate mysqlJdbcTemplate;
    // 下面五个参数可以自行根据网站网页格式调整
    String nullFlag = "没有下级分类";
    String firstFlagAlph = "<ul id=\"list\" class=\"cent\" style=\"list-style:none;\"><li>";
    String startFlagAlph = "<span class=\"code\">";
    String endFlagAlph = "</span>";
    String startFlagName = "\">";
    String endFlagName = "</a>";

    @Test
    public void explainText() {
        for (int i = 1; i < 45837; i++) {
            System.out.println(i);
            explainHtml(i, "");
            System.gc();
        }
        System.out.println("");
    }

    private void explainHtml(Integer i, String parentCode) {
        try {
            StringBuffer sb = new StringBuffer();
            URL url = new URL("http://www.ztflh.com/?c=" + i);
            URLConnection urlConn = url.openConnection();
            urlConn.setReadTimeout(10000);
            urlConn.setConnectTimeout(10000);
            urlConn.setDoOutput(true);
            urlConn.connect();
            //Scanner in = new Scanner(urlConn.getInputStream(), "UTF-8");
       /* for (int n = 1; in.hasNextLine(); n++){
            sb.append(in.nextLine());
        }*/
            //*[@id="list"]/li[1]/span
            if (sb.indexOf(nullFlag) < 0) {
                int start = sb.indexOf(firstFlagAlph, 0) + firstFlagAlph.length();
                int end = start;
                while (sb.indexOf(startFlagAlph, end) >= 0) {
                    start = sb.indexOf(startFlagAlph, end) + startFlagAlph.length();
                    end = sb.indexOf(endFlagAlph, start);
                    String code = sb.substring(start, end).trim();
                    start = sb.indexOf(startFlagName, end) + startFlagName.length();
                    end = sb.indexOf(endFlagName, start);
                    String name = sb.substring(start, end).trim();
                    System.out.println(code + "==>:" + name);
                }
            }
            url = null;
        } catch (Exception e) {
            System.out.println(e.getLocalizedMessage());
        }
    }

    @Data
    public class CtreeNode{
        private String code;
        private String name;
        private String pCode;
        private String url;
    }
    @Test
    public void explain2(){
        List<CtreeNode> nodes=new ArrayList<>();
        explain("","http://www.ztflh.com/?c=1",nodes);
    }
    private void explain(String parentCode,String url,List<CtreeNode> nodes) {
        // 实例化Web客户端、①模拟 Chrome 浏览器 ✔ 、②使用代理IP ✔
        //WebClient webClient = new WebClient(BrowserVersion.CHROME);
        WebClient webClient = new WebClient(BrowserVersion.CHROME);
        webClient.getOptions().setCssEnabled(false); // 取消 CSS 支持 ✔
        webClient.getOptions().setJavaScriptEnabled(false); // 取消 JavaScript支持 ✔
        try {
            HtmlPage page = webClient.getPage(url); // 解析获取页面
            List<HtmlElement> liList = page.getByXPath("//*[@id=\"list\"]/li[*]");
            for (int i = 0; i < liList.size(); i++) {
                HtmlElement htmlElement  = liList.get(i);
                List<HtmlElement> spanList = htmlElement.getByXPath(String.format("//*[@id=\"list\"]/li[%s]/span", i+1));
                if(spanList.size()>0){
                    String code=spanList.get(0).getTextContent();
                    List<HtmlElement> labelItems = htmlElement.getByXPath(String.format("//*[@id=\"list\"]/li[%s]/a",i+1));
                    String name=labelItems.get(0).getTextContent();
                    String nextUrl=labelItems.get(0).getAttribute("href");

                    System.out.println(code+"==>"+name+"==>"+nextUrl);
                    CtreeNode nodeItem=new CtreeNode();
                    nodeItem.setCode(code);
                    nodeItem.setName(name);
                    nodeItem.setPCode(parentCode);
                    nodeItem.setUrl(nextUrl);
                    nodes.add(nodeItem);
                    batchInsert(nodes);
                    nodes.clear();
                    if(StringUtil.isNotBlank(nextUrl)){
                        explain(code,nextUrl,nodes);
                    }
                }
            }

        } catch (FailingHttpStatusCodeException | IOException e) {
            e.printStackTrace();
        } finally {
            webClient.close(); // 关闭客户端，释放内存
        }
    }
    private void batchInsert(List<CtreeNode> nodes){
        String valuesParam = nodes.stream().map(n ->String.format("('%s','%s','%s')",n.getCode(),n.getName(),n.getPCode())).collect(Collectors.joining(","));
        String sql=String.format("insert into zhtclass (code,name,parentCode) values %s",valuesParam);
        mysqlJdbcTemplate.execute(sql);
    }
}
